# Replicates Oneal and Russett 1999 World Politics, Table 1, column 2
# Thursday, 30.June.2005 14:13
# Saturday, July 23, 2005 at 3:35 pm
# Tuesday, 26.July.2005 at 12:34 pm
# re-engineered to do a cross-validation

wd<- c("C:/Documents and Settings/Administrator/My Documents/My Papers/AJPS 2006/AJPS POOL/OR-cv")
setwd(wd)

library(foreign)

ordat<-read.dta("mdwro.dta")

ordat<-order[ordat$dyadid,] # good idea to order data frame on grouping variable for gee

# /*equation 1 with peaceyrs: col 2, Table 1*/
or.repl<-glm(dispute1~smldmat+smldep+smigoabi+lcaprat2+allies+contigkb+logdstab+majpower
                      +peaceyr1+peaceyr2+peaceyr3+peaceyr4,
                      family=binomial,data=ordat)
# /* These results are exactly what are reported in O&R (1999, Table 1, column 1) */

library(nlme)
library(lattice)
library(geepack)
library(doBy)
library(locfit)


ordat$statea<-as.factor(ordat$statea)
ordat$stateb<-as.factor(ordat$stateb)
ordat$dyadid<-as.factor(ordat$dyadid)

infitset<-sample(rownames(ordat),size=149404/2)
totalset<-rownames(ordat)
intestset<-setdiff(totalset,infitset)
ordat.fitset<-ordat[infitset,]
ordat.testset<-ordat[intestset,]


f<-formula(dispute1~ smldmat+smldep+smigoabi+lcaprat2+allies+contigkb+logdstab+majpower+peaceyr1+peaceyr2+peaceyr3+peaceyr4)
or.fitset<-geeglm(f, corstr="ar1",id=dyadid, family=binomial(logit), data=ordat.fitset,std.err="san.se")
summary(or.fitset)

#now let's get the coefficients


ones<-rep(1,length(intestset))
testivs<-ordat.testset[,c(28,30,21,12,8,10,11,32,24,25,26,27)]


coef(or.fitset)
attach(ordat.testset)
eta<-                             
 -1.42152704  -0.06695258*smldmat -28.26031552*smldep +   0.01634213*smigoabi   
 -0.17512662*lcaprat2  -0.39073210*allies + 
  1.55390753*contigkb  -0.39012752*logdstab +   
  1.69889500*majpower  -0.10525352*peaceyr1 +   
  0.16741646*peaceyr2   -0.05885434  *peaceyr3      
-25.01190586 *peaceyr4

pi<-plogis(eta)
testcv<-cbind(ordat.testset$dispute1,pi)

library(verification)
library(ROCR)
# now let's do a roc plot
rocr.pred<-prediction(pi,ordat.testset$dispute1)
rocr.perf<-performance(rocr.pred,"tpr","fpr")

plot(rocr.perf,col="lightblue",lwd=2) # In sample

dput(rocr.perf,"rocr.perf.or5050")

names(ordat.testset)
ones<-rep(1,length(intestset))
testivs<-ordat.testset[,c(28,30,21,12,8,10,11,32,)]
  "statea"   "stateb"   "year"     "dependa"  "dependb"  "demauta" 
 [7] "demautb"  "allies"   "dispute1" "contigkb" "logdstab" "lcaprat2"
[13] "hegpower" "hegdefb"  "avgdmat"  "sddmat"   "avgdep"   "sddep"   
[19] "avgigos"  "sdigos"   "smigoabi" "statusqa" "statusqb" "peaceyr1"
[25] "peaceyr2" "peaceyr3" "peaceyr4" "smldmat"  "lrgdmat"  "smldep"  
[31] "lrgdep"   "majpower" "dyadid"  


# flip it and grip it:
no.dispute<-ordat$dispute1
no.dispute[ordat$dispute1==1]<-0
no.dispute[ordat$dispute1==0]<-1
f<-formula(no.dispute~ smldmat+smldep+smigoabi+lcaprat2+allies+contigkb+logdstab+majpower)
no.dispute.fit<-geeglm(f, corstr="ar1",id=dyadid, family=binomial(logit), data=ordat,std.err="san.se")
summary(no.dispute.fit)
